In [ ]:
#!/usr/bin/python
#coding=utf-8
import numpy as np
import pandas as pd
from openpyxl import load_workbook
from xlrd import open_workbook
import csv
from csv import reader
import re
import os
from matplotlib import pyplot
from matplotlib import style
style.use("ggplot")
#%matplotlib notebook
import sys
import operator
import datetime
import time
import matplotlib.pyplot as plt
In [ ]:
def diffdates(d1, d2):
    #Date format: %Y-%m-%d %H:%M:%S
    return (time.mktime(time.strptime(d2,'%Y-%m-%d %H:%M:%S')) - time.mktime(time.strptime(d1,'%Y-%m-%d %H:%M:%S')))
In [ ]:
filename1 = 'bigdata_correct.xlsx'
df = pd.read_excel(filename1, sheetname='Data')
df = df[df.IdentNo.str[-2:] == "_G"]
In [ ]:
# Add two more columns
print ("start")
df = df.values
a,b = df.shape
z = np.zeros((a,2))
df = np.hstack((df,z))
a,b = df.shape
print ("end")
In [ ]:
#listing all unique products
print ("start")
varia = 0
list1 = []
while (varia < a):
    vari = str(df[varia][2]) + "_" + str(df[varia][3])
    t = len(list1)
    flag = 0
    for i in range(t):
        if list1[i] == vari:
            flag = 1
            break
    if (flag == 0):
        list1.append(vari)
    varia += 1
unique_sets = len(list1)
print (list1)
print (unique_sets)
print ("end")
In [ ]:
#assigning product number depending on TTNR to the data-set
print ("start")
for i in range(a):
    concat= str(df[i][2]) + "_" + str(df[i][3])
    for ii in range(unique_sets):
        if (list1[ii] == concat):
            df[i][4] = ii + 1
            break
print ("end")
In [ ]:
print ("start")
timers = 180
threshold_analysis = []
individual_variations = []

threshold_analysis.append([])
flag = 0
a,b = df.shape
count = 0
value_prev = "0"
date_prev = "0"
    
final_array = []
total_size = 0
for i in range(a):
    total_size += 1
    if flag == 0:
        final_array.append([])
        final_array[count].append(df[i][4])
        final_array[count].append(df[i][1])
        total_size = 1
        flag = 1
    elif df[i][4] != value_prev:
        final_array[count].append(date_prev)
        final_array[count].append(total_size - 1)
        count += 1
        final_array.append([])
        final_array[count].append(df[i][4])
        final_array[count].append(df[i][1])
        total_size = 1
    else:
        t1 = pd.to_datetime(date_prev)
        t2 = pd.to_datetime(df[i][1])
        time = pd.Timedelta(t2 - t1).seconds + pd.Timedelta(t2 - t1).days*24*60*60
        if (time > timers):
            final_array[count].append(date_prev)
            final_array[count].append(total_size - 1)
            count = count + 1
            final_array.append([])
            final_array[count].append(df[i][4])
            final_array[count].append(df[i][1])
            total_size = 1
    df[i][5] = count + 1
    value_prev = df[i][4]
    date_prev = df[i][1]
    if (i == a-1):
        final_array[count].append(date_prev)
        final_array[count].append(total_size - 1)

print (final_array)
print (type(final_array))
np.savetxt("product_type_correct.csv", df, fmt="%s", delimiter=",")
np.savetxt("timelist_correct.csv", final_array, fmt="%s", delimiter=",")
In [ ]:
length_list = len(final_array)
print (length_list)

buckets = [0] * (unique_sets+1)
for i in range(length_list):
    a = final_array[i][0]
    buckets[a] +=  1

print (buckets)
In [ ]:
###Code for generating file, alerady generated so using the generated one
filename2 = 'analyses_data_corrected.xlsx'
pro_data = pd.read_excel(filename2, sheetname='RealtimeData')
pro_data['TIME'] = pd.to_datetime(pro_data['TIME'])

pro_data = pro_data.values
a,b = pro_data.shape
z = np.zeros((a,2))
pro_data = np.hstack((pro_data,z))
a,b = pro_data.shape

i = 0
print ("start")
while (i < a):
    t3 = pro_data[i][0]
    flag = 0
    j = 0
    while (j < length_list):
        t1 = pd.to_datetime(final_array[j][1])
        t2 = pd.to_datetime(final_array[j][2])
        time1 = pd.Timedelta(t2 - t3).seconds + pd.Timedelta(t2 - t3).days*24*60*60
        time2 = pd.Timedelta(t3 - t1).seconds + pd.Timedelta(t3 - t1).days*24*60*60
        time3 = pd.Timedelta(t2 - t1).seconds + pd.Timedelta(t2 - t1).days*24*60*60
        if (time1 >= 0 and time2 >= 0 and time3 >= 0 and (time1+time2 == time3)):
            pro_data[i][b-2] = final_array[j][0]
            pro_data[i][b-1] = j
            flag = 1
            break
        j += 1
    if (flag == 0):
        pro_data = np.delete(pro_data, i, 0)
        i = i - 1
        a,b = pro_data.shape
    i += 1
    
df = pd.DataFrame(pro_data)
filepath = 'my_excel_file_corrected.xlsx'
df.to_excel(filepath, index=False)
In [ ]:
########################NEWWWWWWWWWWWWWW########
filename4 = 'my_excel_file1_corrected.xlsx'
jj = []
counts = 0
print (len(final_array))
for i in range(len(final_array)):
    print (i)
    df1 = pd.read_excel(filename4, sheetname='Sheet1')
    df1 = df1[["TIME","ACTIVEPOWER","PRODUCT_TYPE", "BATCH_TYPE", "REALTIME_E"]]
    df1 = df1[df1.BATCH_TYPE == i]
    df1 = df1.values
    a,b = df1.shape
    if (a == 0):
        continue
    counts += 1
    p = df1[a-1][4]
    q = df1[0][4]
    energy = p - q
    quantitites_value = final_array[i][3]
    e_by_q = (energy*1.00)/quantitites_value
    jj.append([])
    jj[counts-1].append(final_array[i][0])
    jj[counts-1].append(e_by_q)
######new end##########################
print (jj)
In [ ]:
jjj = pd.DataFrame(jj)
print (jjj)
In [ ]:
#jjj.to_excel('e_by_q.xlsx', index=False)
In [ ]:
'''
filename3 = 'e_by_q.xlsx'
for i in range(unique_sets):
    #colors = np.random.rand(i)
    df1 = pd.read_excel(filename3, sheetname='Sheet1')
    #df1 = df1[df1.PRODUCT_TYPE == i]
    y_label = "Energy difference divided by quantity kWh"
    plt.figure(1, figsize=(10,10))
    plt.subplot(211)
    #plt.plot(df1['PRODUCT_TYPE'], df1['E_by_Q'], 'coral', label='positive error')
    plt.scatter(df1['PRODUCT_TYPE'], df1['E_by_Q'], alpha=0.5)
    #plt.title("product type" + str(i))
    plt.xlabel('product number')
    plt.ylabel(y_label)
    plt.legend()
    plt.tight_layout()
    plt.show()
    break
'''
In [26]:
filename3 = 'e_by_q.xlsx'
for i in range(unique_sets):
    #colors = np.random.rand(i)
    df1 = pd.read_excel(filename3, sheetname='Sheet1')
    #df1 = df1[df1.PRODUCT_TYPE == i]
    y_label = "Energy difference divided by quantity kWh"
    #plt.figure(1, figsize=(10,10))
    #plt.subplot(211)
    #plt.plot(df1['PRODUCT_TYPE'], df1['E_by_Q'], 'coral', label='positive error')
    #plt.scatter(df1['PRODUCT_TYPE'], df1['E_by_Q'], alpha=0.5)
    #plt.title("product type" + str(i))
    #plt.xlabel('product number')
    #plt.ylabel(y_label)
    fig, ax = plt.subplots(figsize=(20, 20))
    ax.scatter(df1['PRODUCT_TYPE'], df1['E_by_Q'], alpha=0.5)
    ax.set_xlabel('product number')
    ax.set_ylabel(y_label)
    ax.set_xticks(np.arange(unique_sets + 1))
    #for tick in ax.get_xticklabels():
    #   tick.set_visible(True)
    plt.legend()
    plt.tight_layout()
    plt.show()
    break
    
In [27]:
#filename3 = 'POWER_TTNR.xlsx'
filename3 = 'my_excel_file1_corrected.xlsx'

i = 1

############code addded on 22/6/2018#######
print (list1)
mean_standard = []
quantities_n = []
#########end############

#fig, axs = plt.subplots(unique_sets,1,figsize=(100,300))
#plot_count = 0
while (i <= unique_sets):
    df1 = pd.read_excel(filename3, sheetname='Sheet1')
    df1 = df1[df1.PRODUCT_TYPE == i]
    df1 = df1[["TIME","ACTIVEPOWER","PRODUCT_TYPE", "BATCH_TYPE"]]
    x = df1.BATCH_TYPE.unique()
    
    ################Code started for plotting graph ###################
    '''
    y_label = "Real Time Power of Product " + str(i)
    plt.subplot(221)
    plt.plot(df1['TIME'], df1['ACTIVEPOWER'])
    plt.title(str(i))
    plt.xlabel('time-stamp')
    plt.ylabel(y_label)
    plt.show()
    '''
    ####################Code ended for plotting graph###########
    
    #df1 = df1.values
    #a,b = df1.shape
    
    jj = []
    for kk in range(len(x)):
        jj.append([])
        df2 = df1[df1.BATCH_TYPE == x[kk]]
        df2 = df2.values
        aa, bb = df2.shape
        ref_time = 0
        for ll in range(aa):
            jj[kk].append(df2[ll][1])
    
    jj = pd.DataFrame(jj)
    median_var = jj.median()
    standard_deviation_from_mean_value = jj.std()
    max_value = jj.max()
    min_value = jj.min()
    mean_value = jj.mean()
    kkkkkkk = pd.Series.tolist(median_var)
    kkkkkkk1 = pd.Series.tolist(standard_deviation_from_mean_value)
    kkkkkkk1_numpy = np.asarray(kkkkkkk1)
    ppp = kkkkkkk1_numpy.size
    countttt_batch_size = 0
    for variables in range(ppp):
        if np.isnan(kkkkkkk1_numpy[variables]):
            countttt_batch_size += 1
    mean_std_dev = np.nanmean(kkkkkkk1_numpy)
    mean_standard.append(mean_std_dev)
    quantities_n.append(ppp - countttt_batch_size)
    print("standard deviation is " + str(mean_std_dev))
    kkkkkkk2 = pd.Series.tolist(max_value)
    kkkkkkk3 = pd.Series.tolist(min_value)
    kkkkkkk4 = pd.Series.tolist(mean_value)
    kkkkkkk5 = [b_i - a_i for a_i, b_i in zip(kkkkkkk1, kkkkkkk4)]
    kkkkkkk6 = [b_i + a_i for a_i, b_i in zip(kkkkkkk1, kkkkkkk4)]
    y_a = list(range(0, len(kkkkkkk)))
    y_label = "Real Time Power of Product " + str(i) + " kW"
    plt.figure(1, figsize=(20,10))
    plt.subplot(211)
    plt.plot(y_a, kkkkkkk, 'r', label='median/predicted')
    #plt.plot(y_a, kkkkkkk1, 'k', label='standard deviation from mean value')
    plt.plot(y_a, kkkkkkk5, 'brown', label='negative error')
    plt.plot(y_a, kkkkkkk6, 'coral', label='positive error')
    plt.title("product type" + str(i))
    plt.xlabel('time-stamp in minutes')
    plt.ylabel(y_label)
    plt.legend()
    plt.tight_layout()
    plt.subplot(212)
    plt.plot(y_a, kkkkkkk4, 'bx', label='mean power')
    plt.plot(y_a, kkkkkkk2, 'y', label='maximum power')
    plt.plot(y_a, kkkkkkk3, 'g', label='minimum power')
    plt.title("product type" + str(i))
    plt.xlabel('time-stamp in minutes')
    plt.ylabel(y_label)
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    ##########second phase of code ending#########
    i += 1
    #plot_count += 1

#print (type(mean_standard))
print (mean_standard)
print (quantities_n)
##list1
###mean_standard
###quantities_n


print (mean_standard)
print (quantities_n)
print (list1)
list1_new = []
mean_standard_new = []
quantities_n_new = []
product_number = []
for i in range(len(mean_standard)):
    if str(mean_standard[i]) != 'nan':
        mean_standard_new.append(mean_standard[i])
        quantities_n_new.append(quantities_n[i])
        list1_new.append(list1[i])
        product_number.append(i)

mean_standard = mean_standard_new
quantities_n = quantities_n_new
list1 = list1_new
print (mean_standard)
print (quantities_n)
print (list1)
print (product_number)
width = 0.5
y_pos = np.arange(len(list1))
fig, ax = plt.subplots(figsize=(80, 40))
rects = ax.bar(y_pos, mean_standard, width ,color='r', align='edge' ,alpha=0.5)
ax.set_xticks(y_pos + width/2.)
ax.set_xticklabels(list1, fontsize=10)
ax.set_xlabel('TTNR')
ax.set_ylabel('standard deviation')
plt.legend()
def autolabel(rects,ii):
    for rect in rects:
        height = rect.get_height()
        values = quantities_n[ii]
        value1 = product_number[ii]
        value2 = mean_standard[ii]
        ii += 1
        ax.text(rect.get_x() + rect.get_width()/2., 1.08*height,
                '%s & prod. %d & std %f' %(str(values) , value1, value2),
                ha='center', va='bottom', fontsize=40)
autolabel(rects, 0)
plt.tight_layout()
plt.show()
['116_1038408484', '115_1038307052', '116_1038307052', '115_1038305585', '116_1038305585', '115_1038408318', '116_1038408318', '115_1038408484', '115_1038305594', '115_1038407804', '116_1038305594', '116_1038407804', '115_1038407806', '116_1038407806', '115_1038408272', '115_1038408104', '116_1038408272', '116_1038408104', '115_1038407334', '115_F005V16903', '116_1038407334', '115_1038408462', '116_1038408462', '115_1038407754', '116_1038407754', '115_1038407332', '116_1038407332', '115_1038408840', '116_1038408840', '115_1038305359', '116_1038305359']
standard deviation is 3.49936124956
standard deviation is 3.68703065716
standard deviation is 3.74880587682
standard deviation is 3.03753382962
standard deviation is 3.5994465014
standard deviation is 3.7842768957
standard deviation is 3.76760705638
standard deviation is 3.93105038429
standard deviation is 2.94690317458
standard deviation is 3.46660737528
standard deviation is 3.86932682109
standard deviation is 3.19571440861
standard deviation is 3.21471392671
standard deviation is 3.41499956132
standard deviation is 2.73417698283
standard deviation is 3.8423087534
standard deviation is 3.74496777538
standard deviation is 3.61773649728
standard deviation is 3.99828831384
standard deviation is 2.97462522947
standard deviation is 3.51180720468
standard deviation is 3.53286277084
standard deviation is 3.8769922273
standard deviation is 3.01843345583
standard deviation is 3.55200832146
C:\Program Files\Anaconda3\lib\site-packages\numpy\lib\nanfunctions.py:703: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
standard deviation is nan
C:\Program Files\Anaconda3\lib\site-packages\numpy\lib\nanfunctions.py:703: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
standard deviation is nan
standard deviation is 3.59222072684
standard deviation is 3.21508449045
standard deviation is 2.43887891229
standard deviation is 2.84117539626
standard deviation is 4.15791921042
standard deviation is 1.94461638021
[3.4993612495580368, 3.6870306571611344, 3.7488058768225527, 3.0375338296172174, 3.5994465013966237, 3.784276895700394, 3.7676070563774253, 3.9310503842926572, 2.9469031745838019, 3.4666073752802822, 3.8693268210946399, 3.1957144086139548, 3.2147139267096527, 3.4149995613224315, 2.7341769828343323, 3.8423087534031906, 3.744967775378214, 3.6177364972793216, 3.9982883138418761, 2.9746252294687774, 3.5118072046767335, 3.532862770841172, 3.8769922273043442, 3.0184334558342241, 3.5520083214647347, nan, nan, 3.5922207268351505, 3.2150844904493332, 2.438878912286393, 2.8411753962604567, 4.1579192104221345, 1.944616380212306]
[66, 54, 61, 92, 88, 75, 89, 103, 66, 75, 57, 46, 48, 53, 26, 64, 13, 52, 38, 81, 42, 37, 28, 47, 57, 0, 0, 15, 24, 49, 51, 20, 14]
[3.4993612495580368, 3.6870306571611344, 3.7488058768225527, 3.0375338296172174, 3.5994465013966237, 3.784276895700394, 3.7676070563774253, 3.9310503842926572, 2.9469031745838019, 3.4666073752802822, 3.8693268210946399, 3.1957144086139548, 3.2147139267096527, 3.4149995613224315, 2.7341769828343323, 3.8423087534031906, 3.744967775378214, 3.6177364972793216, 3.9982883138418761, 2.9746252294687774, 3.5118072046767335, 3.532862770841172, 3.8769922273043442, 3.0184334558342241, 3.5520083214647347, nan, nan, 3.5922207268351505, 3.2150844904493332, 2.438878912286393, 2.8411753962604567, 4.1579192104221345, 1.944616380212306]
[66, 54, 61, 92, 88, 75, 89, 103, 66, 75, 57, 46, 48, 53, 26, 64, 13, 52, 38, 81, 42, 37, 28, 47, 57, 0, 0, 15, 24, 49, 51, 20, 14]
['116_1038408484', '115_1038307052', '116_1038307052', '115_1038305585', '116_1038305585', '115_1038408318', '116_1038408318', '115_1038408484', '115_1038305594', '115_1038407804', '116_1038305594', '116_1038407804', '115_1038407806', '116_1038407806', '115_1038408272', '115_1038408104', '116_1038408272', '116_1038408104', '115_1038407334', '115_F005V16903', '116_1038407334', '115_1038408462', '116_1038408462', '115_1038407754', '116_1038407754', '115_1038407332', '116_1038407332', '115_1038408840', '116_1038408840', '115_1038305359', '116_1038305359']
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-27-5cd96c77c1b8> in <module>()
    113         mean_standard_new.append(mean_standard[i])
    114         quantities_n_new.append(quantities_n[i])
--> 115         list1_new.append(list1[i])
    116         product_number.append(i)
    117 

IndexError: list index out of range
In [ ]: